sink("log/dbw_emp_script_log.txt", append = FALSE, type = "output")


## You can install the development version of dbw from GitHub:
devtools::install_github("hirotokatsumata/dbw")

## Packages
library(dbw)
library(furrr)
library(Hmisc)
library(tidyverse)
library(foreign)

library(RCAL)
library(kbal)
library(kernlab)
library(glmnet)


## Functions
dbw_y_wls <- function (formula_y, df, fulldf, est_weights, response) {
  df <- data.frame(df, est_weights = est_weights)
  result <- stats::lm(formula = formula_y, data = df, 
                      weights = est_weights, y = TRUE)
  pred <- stats::predict(object = result, newdata = fulldf, type = "response")
  y <- rep(0, length(response))
  y[response == 1] <- result$y
  list(pred = pred, y = y)
}

bbgrid <- function (lambda, df, formula_ps, formula_y, th) {
  res <- std_comp(formula_y = formula_y, 
                  formula_ps = formula_ps, 
                  estimand = "AO",
                  method_y = "wls",
                  data = df,
                  std = TRUE)
  df2 <- res$data
  cov <- str_trim(str_split(str_split(deparse1(formula_ps), '~', simplify = TRUE)[, 2], 
                            '\\+', simplify = TRUE)[1, ], 
                  side = "both")
  covariates <- df2[, cov]
  name_response <- str_trim(str_split(deparse1(formula_ps), '~', simplify = TRUE)[, 1], side = "both")
  response <- c(df2[, name_response])
  resbb <- future_map_dbl(.x = lambda, 
                          .f = gridbb,
                          df = df2, 
                          response = response,
                          cov = covariates,
                          formula_ps = formula_ps,
                          formula_y = formula_y,
                          th = th)
  names(resbb) <- lambda
  resbb
}

gridbb <- function (.x, df, response, cov, formula_ps, formula_y, th) {
  res <- try(emp_est(data = df, formula_y = formula_y, formula_ps = formula_ps, se = FALSE, B = 1, lambda = .x), silent = TRUE)
  if (class(res) != "try-error") {
    bb(result = res, x = cov, treat = response, sigma = 1 / ncol(cov), std = TRUE, th = th)["bbndbw"]
  } else {
    NA
  }
}

emp_est <- function (data, formula_y, formula_ps, se, B, lambda) {
  df <- data

  df <- std_comp(formula_y = formula_y, 
                 formula_ps = formula_ps, 
                 estimand = "AO",
                 method_y = "wls",
                 data = df,
                 std = FALSE)$data

  N <- nrow(df)

  res <- emp_est_internal(.x = 1, se = FALSE, N = N, lambda = lambda, 
                          df = df, formula_y = formula_y, formula_ps = formula_ps, skipcb = FALSE)
  if (B == 1) {
    return(
      list(result = res$result, df = df, 
           ndbw = res$ndbw, mlew = res$mlew, cbw = res$cbw, cbpsw = res$cbpsw, ebalw = res$ebalw)
    )
  }
  if (se == "jk") {
    B <- N
  }
  res2 <- future_map_dfr(.x = 1:B, 
                         .f = emp_est_internal, .options = furrr_options(seed = NULL),
                         se = se, N = N, lambda = lambda, 
                         df = df, formula_y = formula_y, formula_ps = formula_ps,
                         skipcb = res$skipcb)
  result <- rbind(res$result, res2)
  list(result = result, df = df, 
       ndbw = res$ndbw, mlew = res$mlew, cbw = res$cbw, cbpsw = res$cbpsw, ebalw = res$ebalw)
}


emp_est_internal <- function (.x, se, N, lambda, df, formula_y, formula_ps, skipcb = FALSE) {
  if (se == "bs") {
    set.seed(.x * 100)
    bssample <- sample(1:N, N, replace = TRUE)
    df2 <- df[bssample, ]
  } else if (se == "jk") {
    df2 <- df[-.x, ]
    N <- nrow(df2)
  } else {
    df2 <- df
  }
  res <- std_comp(formula_y = formula_y, 
                  formula_ps = formula_ps, 
                  estimand = "AO",
                  method_y = "wls",
                  data = df2,
                  std = TRUE)
  names_x <- str_trim(str_split(str_split(deparse1(formula_ps), '~', simplify = TRUE)[, 2], 
                                '\\+', simplify = TRUE)[1, ], 
                      side = "both")
  x_ps <- as.matrix(res$data[, names_x])
  name_response <- str_trim(str_split(deparse1(formula_ps), '~', simplify = TRUE)[, 1], side = "both")
  response <- c(res$data[, name_response])
  name_y <- str_trim(str_split(deparse1(formula_y), '~', simplify = TRUE)[, 1], side = "both")
  y <- c(res$data[, name_y])

  if (skipcb == FALSE) {
    result_cb <- RCAL::glm.nreg(y = response, x = x_ps, 
                                iw = NULL, loss = "cal", init = NULL)
    if (result_cb$conv == 0) {
      warning("RCAL is not converged")
    }
    cbw <- response / result_cb$fit / N
    est_cb <- sum(response * y * cbw)
  } else {
    cbw <- NA
    est_cb <- NA
  }

  result_ndbw <- dbw(formula_y = formula_y, 
                     formula_ps = res$formula_ps, 
                     estimand = "AO",
                     method = "dbw",
                     method_y = "wls",
                     data = res$data,
                     normalize = TRUE,
                     vcov = FALSE,
                     lambda = lambda,
                     tol = 1e-8,
                     init_lambda = 0.05)

  result_y <- dbw_y_wls(formula_y = formula_y,
                        df = res$data[response == 1, ], 
                        fulldf = res$data, 
                        est_weights = res$weights[response == 1],
                        response = response)

  result_mle <- dbw(formula_y = formula_y, 
                    formula_ps = res$formula_ps, 
                    estimand = "AO",
                    method = "mle",
                    method_y = "wls",
                    data = res$data,
                    normalize = FALSE,
                    vcov = FALSE,
                    lambda = 0)
  est_mle <- sum(response * (result_y$y - result_y$pred) * result_mle$est_weights + 
                  result_y$pred / N)

  unweighted <- mean(y[response == 1])

  result_cbpsw <- RCAL::glm.nreg(y = response, x = x_ps, 
                                 iw = NULL, loss = "bal", init = NULL)
  if (result_cbpsw$conv == 0) {
    warning("CBPSW is not converged")
  }
  cbpsw_weight <- response / result_cbpsw$fit / N
  est_cbpsw <- sum(response * (result_y$y - result_y$pred) * cbpsw_weight + 
                    result_y$pred / N)

  result_eb <- dbw(formula_y = formula_y, 
                   formula_ps = res$formula_ps, 
                   estimand = "AO",
                   method = "eb",
                   method_y = "wls",
                   data = res$data,
                   normalize = TRUE,
                   vcov = FALSE,
                   lambda = 0)
  if (sum(result_eb$est_weights) > 1.01 | sum(result_eb$est_weights) < 0.99) { 
    print(paste(.x, "EB not converged")) 
  }

  result <- data.frame(result_ndbw$est, est_mle, est_cb, est_cbpsw, result_eb$est, unweighted)
  estimator <- c("nDBW DR", "MLE DR", "CAL (DR)", "CBPS (DR)", "EB (DR)", "Unweighted")
  colnames(result) <- estimator
  if (se %in% c("bs", "jk")) {
    result
  } else {
  list(result = result, 
       ndbw = result_ndbw$est_weights / sum(result_ndbw$est_weights), mlew = result_mle$est_weights, 
       cbw = cbw, cbpsw = cbpsw_weight, ebalw = result_eb$est_weights,
       skipcb = 1 - result_cb$conv)
  }
}

calc_se <- function (res1, res2, se, N) {
  result <- res1 - res2
  if (se == "jk") {
    se <- apply(result, 2, sd) * (N - 1) / sqrt(N)
  } else {
    se <- apply(result, 2, sd)
  }
  data.frame(Estimator = colnames(res1), est = unlist(result[1, ]), se = se)
}

## Bias bound from kbal (Hazlett 2020)
bb <- function (result, x, treat, sigma, std, th = 0) {
  x <- as.matrix(x)
  if (std == TRUE) {
    x <- as.matrix(scale(x))
  }
  N <- nrow(x)
  N1 <- sum(treat)
  ndbw <- result$ndbw
  ndbw[treat == 0] <- 0
  ndbw <- ndbw / sum(ndbw) * N1
  ndbw[treat == 0] <- 1
  cbw <- result$cbw
  cbw[treat == 0] <- 0
  cbw <- cbw / sum(cbw) * N1
  cbw[treat == 0] <- 1
  mlew <- result$mlew
  mlew[treat == 0] <- 0
  mlew <- mlew / sum(mlew) * N1
  mlew[treat == 0] <- 1
  cbpsw <- result$cbpsw
  cbpsw[treat == 0] <- 0
  cbpsw <- cbpsw / sum(cbpsw) * N1
  cbpsw[treat == 0] <- 1
  ebalw <- result$ebalw
  ebalw[treat == 0] <- 0
  ebalw <- ebalw / sum(ebalw) * N1
  ebalw[treat == 0] <- 1
  unweighted <- rep(1, N)
  kernm <- kernlab::kernelMatrix(kernel = kernlab::rbfdot(sigma = sigma), x = x)
  kerneigen <- eigen(x = kernm, symmetric = TRUE)
  kerneigen$values[kerneigen$values < th] <- 0
  kernsvd <- list(d = kerneigen$values, u = kerneigen$vectors, v = kerneigen$vectors)
  observed <- treat
  target <- rep(1, N)
  kbalw <- kbal::getw(target = target, observed = observed, svd.U = kernsvd$u[, 1:10])$w
  bbndbw <- kbal::biasbound(observed = observed, target = target, svd.out = kernsvd, w = ndbw, hilbertnorm = 1)
  bbcbw <- kbal::biasbound(observed = observed, target = target, svd.out = kernsvd, w = cbw, hilbertnorm = 1)
  bbmlew <- kbal::biasbound(observed = observed, target = target, svd.out = kernsvd, w = mlew, hilbertnorm = 1)
  bbcbpsw <- kbal::biasbound(observed = observed, target = target, svd.out = kernsvd, w = cbpsw, hilbertnorm = 1)
  bbebalw <- kbal::biasbound(observed = observed, target = target, svd.out = kernsvd, w = ebalw, hilbertnorm = 1)
  bbunw <- kbal::biasbound(observed = observed, target = target, svd.out = kernsvd, w = unweighted, hilbertnorm = 1)
  bbkbalw <- kbal::biasbound(observed = observed, target = target, svd.out = kernsvd, w = kbalw, hilbertnorm = 1)
  l1ndbw <- kbal::getdist(target = target, observed = observed, K = kernm, svd.U = kernsvd$u, w = ndbw)$L1
  l1cbw <- kbal::getdist(target = target, observed = observed, K = kernm, svd.U = kernsvd$u, w = cbw)$L1
  l1mlew <- kbal::getdist(target = target, observed = observed, K = kernm, svd.U = kernsvd$u, w = mlew)$L1
  l1cbpsw <- kbal::getdist(target = target, observed = observed, K = kernm, svd.U = kernsvd$u, w = cbpsw)$L1
  l1ebalw <- kbal::getdist(target = target, observed = observed, K = kernm, svd.U = kernsvd$u, w = ebalw)$L1
  l1unw <- kbal::getdist(target = target, observed = observed, K = kernm, svd.U = kernsvd$u, w = unweighted)$L1
  l1kbalw <- kbal::getdist(target = target, observed = observed, K = kernm, svd.U = kernsvd$u, w = kbalw)$L1
  result <- c(bbndbw, bbmlew, bbcbw, bbcbpsw, bbebalw, bbunw, bbkbalw,
              l1ndbw, l1mlew, l1cbw, l1cbpsw, l1ebalw, l1unw, l1kbalw)
  names(result) <- c("bbndbw", "bbmlew", "bbcbw", "bbcbpsw", "bbebalw", "bbunw", "bbkbalw",
                     "l1ndbw", "l1mlew", "l1cbw", "l1cbpsw", "l1ebalw", "l1unw", "l1kbalw")
  result
}

bbtidy <- function (resultt, resultc, formula_pst, formula_psc, th) {
  treat <- str_trim(str_split(deparse1(formula_pst), '~', simplify = TRUE)[, 1], side = "both")
  control <- str_trim(str_split(deparse1(formula_psc), '~', simplify = TRUE)[, 1], side = "both")
  cov <- str_trim(str_split(str_split(deparse1(formula_pst), '~', simplify = TRUE)[, 2], 
                            '\\+', simplify = TRUE)[1, ], 
                  side = "both")
  bbt <- bb(result = resultt, x = resultt$df[, cov], treat = resultt$df[treat], sigma = 1 / length(cov), std = TRUE, th = th)[1:6]
  bbc <- bb(result = resultc, x = resultc$df[, cov], treat = resultc$df[control], sigma = 1 / length(cov), std = TRUE, th = th)[1:6]
  data.frame(bbt, bbc, Estimator = colnames(resultt$result)[1:6])
}

search_lambda <- function (result) {
  limit <- min(result, na.rm = TRUE) * 1.01
  x <- as.numeric(names(result))
  maxy <- max(result, na.rm = TRUE)
  plot(x, result, ylim = c(0, maxy))
  abline(h = limit, lty = 2)
  result[which(result < limit)]
}

## Functions for covariate balance checks
cov_select <- function (data, formula, y, covariates) {
  x <- model.matrix(formula_lasso, data)[, -1]
  ## Find the best lambda using leave-one-out-cross-validation
  cv.lasso <- cv.glmnet(x = x, y = y, alpha = 1, nfolds = nrow(data))
  ## Fit the model using the best lambda
  res_lasso <- glmnet(x = x, y = y, alpha = 1, lambda = cv.lasso$lambda.min)
  ## Select relevant covariates
  cov_lasso <- names(which(abs(res_lasso$beta[, 1]) > 0))
  cov_nonduplicated <- setdiff(cov_lasso, covariates)
  cov_selected <- as.formula(paste0("~ ", paste0(cov_nonduplicated, collapse = " + ")))
  cov_excluded <- setdiff(covariates, cov_lasso)
  list(cov_selected = cov_selected, cov_excluded = cov_excluded)
}

plot_balance <- function (result, data, formula_ps, formula_y, cov_selected, cov_excluded) {
  n <- nrow(data)
  result_bal <- list(data = data, 
                     formula_ps = formula_ps, 
                     formula_y = formula_y, 
                     original_weights = rep(1, n),
                     estimand = "AO")
  class(result_bal) <- "dbw"
  result_dbw <- result_bal
  result_cbpsw <- result_bal
  result_ebw <- result_bal
  result_dbw$est_weights <- result$ndbw
  result_cbpsw$est_weights <- result$cbpsw
  result_ebw$est_weights <- result$ebalw
  bal_dbw <- plot(result_dbw, addcov = cov_selected, plot = FALSE)
  bal_cbpsw <- plot(result_cbpsw, addcov = cov_selected, plot = FALSE)
  bal_ebw <- plot(result_ebw, addcov = cov_selected, plot = FALSE)
  cov_included <- setdiff(bal_dbw$covariates, c("(Intercept)", cov_excluded))
  bal_dbw <- bal_dbw$diff.adj[bal_dbw$covariates %in% cov_included]
  bal_cbpsw <- bal_cbpsw$diff.adj[bal_cbpsw$covariates %in% cov_included]
  bal_ebw <- bal_ebw$diff.adj[bal_ebw$covariates %in% cov_included]
  order_dbw <- order(abs(bal_dbw))
  covariates <- gsub(pattern = ":", replacement = " * ", cov_included)
  covariates <- gsub(pattern = "I\\(", replacement = "", covariates)
  covariates <- gsub(pattern = "\\^2\\)", replacement = "\\^2", covariates)
  covariates <- gsub(pattern = "pctright", replacement = "Rightist vote share", covariates)
  covariates <- gsub(pattern = "pctleft", replacement = "Leftist vote share", covariates)
  covariates <- gsub(pattern = "near_km", replacement = "Distance from the border", covariates)
  covariates <- gsub(pattern = "logpop", replacement = "Log(Population)", covariates)
  covariates <- gsub(pattern = "log_sd_elev250", replacement = "Raggedness", covariates)
  covariates <- gsub(pattern = "log_elev_mean", replacement = "Elevation", covariates)
  covariates <- gsub(pattern = "log_hectares_scaled", replacement = "Urbanization", covariates)
  covariates <- gsub(pattern = "communications", replacement = "Communications", covariates)
  covariates <- gsub(pattern = "distancetotrain", replacement = "Distance to train stations", covariates)
  df <- data.frame(covariates = factor(covariates, levels = covariates[order_dbw]), 
                   bal_dbw = abs(bal_dbw), 
                   bal_cbpsw = abs(bal_cbpsw), 
                   bal_ebw = abs(bal_ebw))
  maxdiff <- max(c(df$bal_dbw, df$bal_cbpsw, df$bal_ebw))
  legendx <- (maxdiff * 5 / 6)
  cols <- c(grDevices::rgb(225 / 255, 112 / 255, 85 / 255), 
            grDevices::rgb(20 / 255, 179 / 255, 125 / 255), 
            grDevices::rgb(52 / 255, 152 / 255, 219 / 255))
  oldpar <- graphics::par(no.readonly = TRUE)
  on.exit(graphics::par(oldpar), add = TRUE)
  par(mar = c(4.1, 18.1, 3.1, 0.6))
  graphics::plot(x = df$bal_dbw, 
                 y = df$covariates,
                 pch = 21,
                 col = cols[1],
                 cex = 1.8,
                 lwd = 2.8,
                 xlim = c(0, maxdiff),
                 xlab = "", ylab = "",
                 axes = FALSE)
  graphics::par(new = TRUE)
  graphics::plot(x = df$bal_cbpsw, 
                 y = df$covariates,
                 pch = 24,
                 col = cols[2],
                 cex = 1.5,
                 lwd = 2.2,
                 xlim = c(0, maxdiff),
                 xlab = "", ylab = "",
                 axes = FALSE)
  graphics::par(new = TRUE)
  graphics::plot(x = df$bal_ebw, 
                 y = df$covariates, 
                 pch = 22,
                 col = cols[3],
                 cex = 1.7,
                 lwd = 2.2,
                 xlim = c(0, maxdiff),
                 cex.lab  = 1.2,
                 xlab = "Absolute standardized mean difference", ylab = "",
                 yaxt = "n",
                 main = "Covariate balance")
  graphics::abline(v = 0, col = "grey10", lty = "solid")
  graphics::abline(h = seq(1, length(df$covariates)), col = "grey70", lty = "dashed", lwd = 1.2)
  graphics::axis(2, at = c(1:nrow(df)), labels = df$covariates, las = 1)
  graphics::par(xpd = TRUE)
  graphics::legend(x = legendx, y = 5,
                   legend = c("nDBW", "CBPS", "EB"), 
                   col = cols, pch = c(21, 24, 22), pt.cex = 1.5, pt.lwd = 2, yjust = 0,
                   x.intersp = 0.7, y.intersp = 0.9,
                   #bty = "n",
                   bg = "transparent")
  df
}

## Section 6 and Online Supplementary Materials G
## Replication for Ferwerda and Miller (2014)

## Set the number of cores
plan(strategy = multisession(workers = 2))

## Data wrangling
cov <- c("distancetotrain", "communications", "log_hectares_scaled",
         "logpop", "pctright", "pctleft", "log_sd_elev250", "near_km", "log_elev_mean")
df <- read.dta("data/FM_France.dta", convert.factors = FALSE)
df <- df %>%
      rename(sabotage = s) %>%
      mutate(logpop = log(population),
             log_hectares_scaled = log(hectares_scaled),
             log_sd_elev250 = log(sd_elev250),
             log_elev_mean = log(elev_mean),
             north = 1 - south) %>% 
      dplyr::select(sabotage, south, north, all_of(cov)) %>% 
             filter(complete.cases(.))
df15 <- df %>% filter(near_km <= 15 & near_km > 0)
df20 <- df %>% filter(near_km <= 20 & near_km > 5)
df25 <- df %>% filter(near_km <= 25 & near_km > 10)
df30 <- df %>% filter(near_km <= 30 & near_km > 15)
df35 <- df %>% filter(near_km <= 35 & near_km > 20)

df175 <- df %>% filter(near_km <= 17.5 & near_km > 2.5)
df225 <- df %>% filter(near_km <= 22.5 & near_km > 7.5)
df275 <- df %>% filter(near_km <= 27.5 & near_km > 12.5)
df325 <- df %>% filter(near_km <= 32.5 & near_km > 17.5)

## Formula
covariates <- paste(cov, collapse = "+")
formula_y <- as.formula(paste0("sabotage ~ ", covariates))
formula_pst <- as.formula(paste0("south ~ ", covariates))
formula_psc <- as.formula(paste0("north ~ ", covariates))

## Analysis
lambdat <- c(0, seq(0.01, 0.2, by = 0.01))
lambdac <- c(0, seq(0.01, 0.2, by = 0.01))

bbt35 <- bbgrid(lambda = lambdat, df = df35, formula_ps = formula_pst, formula_y = formula_y, th = 0)
# lambda = 0.01 does not converge
bbc35 <- bbgrid(lambda = lambdac, df = df35, formula_ps = formula_psc, formula_y = formula_y, th = 0)
# lambda = 0.01 does not converge

bbt325 <- bbgrid(lambda = lambdat, df = df325, formula_ps = formula_pst, formula_y = formula_y, th = 0)
# lambda = 0.01 does not converge
bbc325 <- bbgrid(lambda = lambdac, df = df325, formula_ps = formula_psc, formula_y = formula_y, th = 0)
# lambda = 0.01 does not converge

bbt30 <- bbgrid(lambda = lambdat, df = df30, formula_ps = formula_pst, formula_y = formula_y, th = 0)
# lambda = 0.01 does not converge
bbc30 <- bbgrid(lambda = lambdac, df = df30, formula_ps = formula_psc, formula_y = formula_y, th = 0)
# lambda = 0.01 does not converge

bbt275 <- bbgrid(lambda = lambdat, df = df275, formula_ps = formula_pst, formula_y = formula_y, th = 0)
# lambda = 0.01 does not converge
bbc275 <- bbgrid(lambda = lambdac, df = df275, formula_ps = formula_psc, formula_y = formula_y, th = 0)

bbt25 <- bbgrid(lambda = lambdat, df = df25, formula_ps = formula_pst, formula_y = formula_y, th = 0)
# lambda = 0.01 does not converge
bbc25 <- bbgrid(lambda = lambdac, df = df25, formula_ps = formula_psc, formula_y = formula_y, th = 0)
# lambda = 0 does not converge

bbt225 <- bbgrid(lambda = lambdat, df = df225, formula_ps = formula_pst, formula_y = formula_y, th = 0)
# lambda = 0.01 does not converge
bbc225 <- bbgrid(lambda = lambdac, df = df225, formula_ps = formula_psc, formula_y = formula_y, th = 0)

bbt20 <- bbgrid(lambda = lambdat, df = df20, formula_ps = formula_pst, formula_y = formula_y, th = 0)
# lambda = 0.01 does not converge
bbc20 <- bbgrid(lambda = lambdac, df = df20, formula_ps = formula_psc, formula_y = formula_y, th = 0)

bbt175 <- bbgrid(lambda = lambdat, df = df175, formula_ps = formula_pst, formula_y = formula_y, th = 0)
bbc175 <- bbgrid(lambda = lambdac, df = df175, formula_ps = formula_psc, formula_y = formula_y, th = 0)

bbt15 <- bbgrid(lambda = lambdat, df = df15, formula_ps = formula_pst, formula_y = formula_y, th = 0)
# lambda = 0.01 does not converge
bbc15 <- bbgrid(lambda = lambdac, df = df15, formula_ps = formula_psc, formula_y = formula_y, th = 0)


B <- 2000
## Note that seed is internally fixed

## Search the smallest value of lambda whose upper bound of bias is smaller than the minimal bias * 1.01
search_lambda(bbt35)
search_lambda(bbc35)
rest35 <- emp_est(data = df35, formula_y = formula_y, formula_ps = formula_pst, se = "jk", B = B, lambda = 0.09)
resc35 <- emp_est(data = df35, formula_y = formula_y, formula_ps = formula_psc, se = "jk", B = B, lambda = 0.05)
result35 <- calc_se(res1 = resc35$result, res2 = rest35$result, se = "jk", N = nrow(rest35$df))
result35

search_lambda(bbt325)
search_lambda(bbc325)
rest325 <- emp_est(data = df325, formula_y = formula_y, formula_ps = formula_pst, se = "jk", B = B, lambda = 0.09)
resc325 <- emp_est(data = df325, formula_y = formula_y, formula_ps = formula_psc, se = "jk", B = B, lambda = 0.06)
result325 <- calc_se(res1 = resc325$result, res2 = rest325$result, se = "jk", N = nrow(rest325$df))
result325

search_lambda(bbt30)
search_lambda(bbc30)
rest30 <- emp_est(data = df30, formula_y = formula_y, formula_ps = formula_pst, se = "jk", B = B, lambda = 0.10)
resc30 <- emp_est(data = df30, formula_y = formula_y, formula_ps = formula_psc, se = "jk", B = B, lambda = 0.03)
result30 <- calc_se(res1 = resc30$result, res2 = rest30$result, se = "jk", N = nrow(rest30$df))
result30

search_lambda(bbt275)
search_lambda(bbc275)
rest275 <- emp_est(data = df275, formula_y = formula_y, formula_ps = formula_pst, se = "jk", B = B, lambda = 0.09)
resc275 <- emp_est(data = df275, formula_y = formula_y, formula_ps = formula_psc, se = "jk", B = B, lambda = 0.03)
result275 <- calc_se(res1 = resc275$result, res2 = rest275$result, se = "jk", N = nrow(rest275$df))
result275

search_lambda(bbt25)
search_lambda(bbc25)
rest25 <- emp_est(data = df25, formula_y = formula_y, formula_ps = formula_pst, se = "jk", B = B, lambda = 0.09)
resc25 <- emp_est(data = df25, formula_y = formula_y, formula_ps = formula_psc, se = "jk", B = B, lambda = 0.02)
result25 <- calc_se(res1 = resc25$result, res2 = rest25$result, se = "jk", N = nrow(rest25$df))
result25

search_lambda(bbt225)
search_lambda(bbc225)
rest225 <- emp_est(data = df225, formula_y = formula_y, formula_ps = formula_pst, se = "jk", B = B, lambda = 0.11)
resc225 <- emp_est(data = df225, formula_y = formula_y, formula_ps = formula_psc, se = "jk", B = B, lambda = 0.03)
result225 <- calc_se(res1 = resc225$result, res2 = rest225$result, se = "jk", N = nrow(rest225$df))
result225

search_lambda(bbt20)
search_lambda(bbc20)
rest20 <- emp_est(data = df20, formula_y = formula_y, formula_ps = formula_pst, se = "jk", B = B, lambda = 0.10)
resc20 <- emp_est(data = df20, formula_y = formula_y, formula_ps = formula_psc, se = "jk", B = B, lambda = 0.04)
result20 <- calc_se(res1 = resc20$result, res2 = rest20$result, se = "jk", N = nrow(rest20$df))
result20

search_lambda(bbt175)
search_lambda(bbc175)
rest175 <- emp_est(data = df175, formula_y = formula_y, formula_ps = formula_pst, se = "jk", B = B, lambda = 0.11)
resc175 <- emp_est(data = df175, formula_y = formula_y, formula_ps = formula_psc, se = "jk", B = B, lambda = 0.03)
result175 <- calc_se(res1 = resc175$result, res2 = rest175$result, se = "jk", N = nrow(rest175$df))
result175

search_lambda(bbt15)
search_lambda(bbc15)
rest15 <- emp_est(data = df15, formula_y = formula_y, formula_ps = formula_pst, se = "jk", B = B, lambda = 0.13)
resc15 <- emp_est(data = df15, formula_y = formula_y, formula_ps = formula_psc, se = "jk", B = B, lambda = 0.04)
result15 <- calc_se(res1 = resc15$result, res2 = rest15$result, se = "jk", N = nrow(rest15$df))
result15


color6 <- c("#e74c3c", "#2ecc71", "#3498db", "#9b59b6", "#34495e", "#f39c12")

estimator_levels <- c("nDBW DR", "MLE DR", "CAL (DR)", "CBPS (DR)", "EB (DR)", "Unweighted")
result1535 <- data.frame(data.frame(result15, distance = 15)) %>%
              bind_rows(., data.frame(result175, distance = 17.5)) %>%
              bind_rows(., data.frame(result20, distance = 20)) %>%
              bind_rows(., data.frame(result225, distance = 22.5)) %>%
              bind_rows(., data.frame(result25, distance = 25)) %>%
              bind_rows(., data.frame(result275, distance = 27.5)) %>%
              bind_rows(., data.frame(result30, distance = 30)) %>%
              bind_rows(., data.frame(result325, distance = 32.5)) %>%
              bind_rows(., data.frame(result35, distance = 35)) %>%
              mutate(Estimator = factor(Estimator, levels = estimator_levels))

## Figure 6 (left panele)
result1535 %>%
    filter(Estimator == "EB (DR)") %>%
    ggplot(., aes(x = distance, y = est, color = Estimator, group = Estimator)) +
    geom_line(linewidth = 0.65, alpha = 0.85) +
    geom_line(data = subset(result1535, Estimator != "EB (DR)"), linewidth = 0.65, alpha = 0.85) +
    geom_hline(yintercept = 0, linetype = "dotted", color = "grey40", linewidth = 0.5) +
    xlab("Distance from the demarcation line") +
    ylab("ATE estimates") +
    scale_color_manual(values = color6, limits = estimator_levels) +
    theme_classic() +
    theme(legend.position = c(0.86, 0.8),
          axis.title = element_text(size = 9.5),
          axis.text = element_text(size = 10),
          plot.title = element_text(hjust = 0.5, size = 12),
          legend.title = element_text(size = 0),
          legend.text = element_text(size = 9.3),
        legend.background = element_rect(fill = NA, colour = NA))
ggsave("figures/fig_6_left.pdf", width = 8.5, height = 10, units = "cm")
#ggsave("figures/FM_france_estimate.pdf", width = 8.5, height = 10, units = "cm")


## Figure 6 (right panel)
result1535 %>%
filter(Estimator == "nDBW DR") %>%
mutate(upper = est + se * qnorm(0.975),
       lower = est + se * qnorm(0.025)) %>%
ggplot(., aes(x = distance, y = est)) +
  geom_line(linewidth = 0.7) +
  geom_ribbon(aes(ymin = lower, ymax = upper), 
              alpha = 0.2, 
              linetype = 0,
              color = "grey") +
  geom_hline(yintercept = 0, linetype = "dotted", color = "grey40", linewidth = 0.5) +
  xlab("Distance from the demarcation line") +
  ylab("ATE estimates") +
  theme_classic() +
  theme(legend.position = c(0.86, 0.2),
        axis.title = element_text(size = 9.5),
        axis.text = element_text(size = 10),
        plot.title = element_text(hjust = 0.5, size = 12),
        legend.title = element_text(size = 0),
        legend.text = element_text(size = 9.3),
        legend.background = element_rect(fill = NA, colour = NA))
ggsave("figures/fig_6_right.pdf", width = 8.5, height = 10, units = "cm")
#ggsave("figures/FM_france_ndbw.pdf", width = 8.5, height = 10, units = "cm")


resubb <- 
data.frame(data.frame(bbtidy(resultt = rest15, resultc = resc15, formula_pst = formula_pst, formula_psc = formula_psc, th = 1e-8), 
                      distance = 15)) %>%
  bind_rows(., data.frame(bbtidy(resultt = rest175, resultc = resc175, formula_pst = formula_pst, formula_psc = formula_psc, th = 1e-8), 
            distance = 17.5)) %>%
  bind_rows(., data.frame(bbtidy(resultt = rest20, resultc = resc20, formula_pst = formula_pst, formula_psc = formula_psc, th = 1e-8), 
            distance = 20)) %>%
  bind_rows(., data.frame(bbtidy(resultt = rest225, resultc = resc225, formula_pst = formula_pst, formula_psc = formula_psc, th = 1e-8), 
            distance = 22.5)) %>%
  bind_rows(., data.frame(bbtidy(resultt = rest25, resultc = resc25, formula_pst = formula_pst, formula_psc = formula_psc, th = 1e-8), 
            distance = 25)) %>%
  bind_rows(., data.frame(bbtidy(resultt = rest275, resultc = resc275, formula_pst = formula_pst, formula_psc = formula_psc, th = 1e-8), 
            distance = 27.5)) %>%
  bind_rows(., data.frame(bbtidy(resultt = rest30, resultc = resc30, formula_pst = formula_pst, formula_psc = formula_psc, th = 1e-8), 
            distance = 30)) %>%
  bind_rows(., data.frame(bbtidy(resultt = rest325, resultc = resc325, formula_pst = formula_pst, formula_psc = formula_psc, th = 1e-8), 
            distance = 32.5)) %>%
  bind_rows(., data.frame(bbtidy(resultt = rest35, resultc = resc35, formula_pst = formula_pst, formula_psc = formula_psc, th = 1e-8), 
            distance = 35)) %>%
  mutate(bb = bbt + bbc) %>%
  mutate(Estimator = factor(Estimator, levels = estimator_levels))

## Figure 5
resubb$bb[resubb$Estimator == "CAL (DR)" & resubb$distance >= 30] <- NA
ggplot(resubb, aes(x = distance, y = bb, color = Estimator, group = Estimator)) +
  geom_line(linewidth = 0.65, alpha = 0.85) +
  xlab("Distance from the demarcation line") +
  ylab("Upper bound of bias") +
  scale_color_manual(values = color6) +
  theme_classic() +
  theme(legend.position = c(0.17, 0.75),
        axis.title = element_text(size = 9.5),
        axis.text = element_text(size = 10),
        plot.title = element_text(hjust = 0.5, size = 12),
        legend.title = element_text(size = 0),
        legend.text = element_text(size = 9.5),
        legend.background = element_rect(fill = NA, colour = NA))
ggsave("figures/fig_5.pdf", width = 9.5, height = 8.5, units = "cm")
#ggsave("figures/FM_france_ubb.pdf", width = 9.5, height = 8.5, units = "cm")


## LaTeX Table
restbl <- data.frame(left_join(result15, subset(resubb, distance == 15)[, c(3, 5)], by = "Estimator"), 
                     lambdat = 0.13, lambdac = 0.05) %>%
bind_rows(data.frame(left_join(result175, subset(resubb, distance == 17.5)[, c(3, 5)], by = "Estimator"), 
                     lambdat = 0.11, lambdac = 0.06)) %>%
bind_rows(data.frame(left_join(result20, subset(resubb, distance == 20)[, c(3, 5)], by = "Estimator"), 
                     lambdat = 0.10, lambdac = 0.03)) %>%
bind_rows(data.frame(left_join(result225, subset(resubb, distance == 22.5)[, c(3, 5)], by = "Estimator"), 
                     lambdat = 0.11, lambdac = 0.03)) %>%
bind_rows(data.frame(left_join(result25, subset(resubb, distance == 25)[, c(3, 5)], by = "Estimator"), 
                     lambdat = 0.09, lambdac = 0.02)) %>%
bind_rows(data.frame(left_join(result275, subset(resubb, distance == 27.5)[, c(3, 5)], by = "Estimator"), 
                     lambdat = 0.09, lambdac = 0.03)) %>%
bind_rows(data.frame(left_join(result30, subset(resubb, distance == 30)[, c(3, 5)], by = "Estimator"), 
                     lambdat = 0.10, lambdac = 0.04)) %>%
bind_rows(data.frame(left_join(result325, subset(resubb, distance == 32.5)[, c(3, 5)], by = "Estimator"), 
                     lambdat = 0.09, lambdac = 0.03)) %>%
bind_rows(data.frame(left_join(result35, subset(resubb, distance == 35)[, c(3, 5)], by = "Estimator"), 
                     lambdat = 0.09, lambdac = 0.04)) %>%
mutate(est = round(est, digits = 2),
       se = round(se, digits = 2),
       bb = round(bb, digits = 2)) %>%
rename(ATE = est, s.e. = se, bias = bb)

restbl[restbl$Estimator != "nDBW DR", c("lambdat", "lambdac")] <- NA
restbl$Estimator[restbl$Estimator == "CAL (DR)"] <- "Calibrated weighting DR"
restbl$Estimator[restbl$Estimator == "CBPS (DR)"] <- "CBPS DR"
restbl$Estimator[restbl$Estimator == "EB (DR)"] <- "Entropy balancing DR"
restbl$Estimator[restbl$Estimator == "nDBW DR"] <- "\\textbf{nDBW DR}"

clabels <- colnames(restbl %>% select(-Estimator))
clabels[clabels == "bias"] <- "upper bound of bias"
clabels[clabels == "lambdat"] <- "$\\lambda_t$"
clabels[clabels == "lambdac"] <- "$\\lambda_c$"
caption <- "Results for the study of political devolution and resistance activities"
note1 <- 
  paste0("\\parbox{0.95\\textwidth}
  {Notes: This table presents the results for the municipalities within various distances from the demarcation line 
          estimated with the various methods, 
          where the third column shows the upper bound of bias 
          and the last two columns show the hyper-parameters used for the nDBW DR estimator.}")
note2 <- 
  paste0("\\parbox{0.95\\textwidth}
  {Notes: This table presents the results for the municipalities within various distances from the demarcation line 
          estimated with the various methods, 
          where the third column shows the upper bound of bias 
          and the last two columns show the hyper-parameters used for the nDBW DR estimator.
          The blanks for the calibrated weighting DR estimator indicate that it does not estimate weights 
          because of the convergence issue.}")

rgroups <- c(paste0("\\multicolumn{6}{l}{$\\mathbf{0 < \\textbf{(distance from the demarcation line)} < 15}$"),
             paste0("\\multicolumn{6}{l}{$\\mathbf{2.5 < \\textbf{(distance from the demarcation line)} < 17.5}$"),
             paste0("\\multicolumn{6}{l}{$\\mathbf{5 < \\textbf{(distance from the demarcation line)} < 20}$"),
             paste0("\\multicolumn{6}{l}{$\\mathbf{7.5 < \\textbf{(distance from the demarcation line)} < 22.5}$"),
             paste0("\\multicolumn{6}{l}{$\\mathbf{10 < \\textbf{(distance from the demarcation line)} < 25}$"),
             paste0("\\multicolumn{6}{l}{$\\mathbf{12.5 < \\textbf{(distance from the demarcation line)} < 27.5}$"),
             paste0("\\multicolumn{6}{l}{$\\mathbf{15 < \\textbf{(distance from the demarcation line)} < 30}$"),
             paste0("\\multicolumn{6}{l}{$\\mathbf{17.5 < \\textbf{(distance from the demarcation line)} < 32.5}$"),
             paste0("\\multicolumn{6}{l}{$\\mathbf{20 < \\textbf{(distance from the demarcation line)} < 35}$"))

## LaTeX table
## Table G.1 (Online Supplementary Materials G)
latex(object = (restbl %>% select(-Estimator))[1:30, ],
      title = "",
      file = "tables/tab_g1.tex",
      #file = "tables/FM_france1.tex",
      label = "tb_FM_france1",
      caption = caption,
      insert.bottom = note1,
      first.hline.double = FALSE,
      rowname = restbl$Estimator,
      colheads = clabels,
      rgroup = rgroups[1:5],
      n.rgroup = c(rep(6, 5)),
      rgroupTexCmd = "",
      longtable = FALSE,
      center = "centering")

## Table G.2 (Online Supplementary Materials G)
latex(object = (restbl %>% select(-Estimator))[31:54, ],
      title = "",
      file = "tables/tab_g2.tex",
      #file = "tables/FM_france2.tex",
      label = "tb_FM_france2",
      caption = caption,
      insert.bottom = note2,
      first.hline.double = FALSE,
      rowname = restbl$Estimator,
      colheads = clabels,
      rgroup = rgroups[6:9],
      n.rgroup = c(rep(6, 4)),
      rgroupTexCmd = "",
      longtable = FALSE,
      center = "centering")


## Covariate balance
## Select relevant covariates for potential outcomes models
## 20 < near_km <= 35
## For treatment group
df35t_lasso <- std_comp(
               formula_y = formula_y,
               formula_ps = formula_pst,
               estimand = "ATT",
               method_y = "wls",
               data = df35,
               std = TRUE,
               weights = NULL)$data
df35t_lasso <- df35t_lasso[df35t_lasso$south == 1, ]
cov_interacted <- paste0("I(", cov, "^2)", collapse = " + ")
formula_lasso <- as.formula(paste0("~ (", covariates, ")^2 + ", cov_interacted))
# set.seed(20231220) # not required for the LOOCV
cov_selected <- cov_select(data = df35t_lasso, formula = formula_lasso, y = df35t_lasso$sabotage, covariates = cov)
cov_selected

df35_bal <- std_comp(
            formula_y = formula_y,
            formula_ps = formula_pst,
            estimand = "AO",
            method_y = "wls",
            data = df35,
            std = TRUE,
            weights = NULL)$data

## Figure 7
pdf(file = "figures/fig_7.pdf", width = 8.5, height = 6.5)
#pdf(file = "figures/cov_bal.pdf", width = 8.5, height = 6.5)
plot_balance(result = rest35, data = df35_bal, formula_ps = formula_pst, formula_y = formula_y, 
             cov_selected = cov_selected$cov_selected, cov_excluded = cov_selected$cov_excluded)
dev.off()



## Online Supplementary Materials H
## Replication for Ladd and Lenz (2009)

df <- read.dta("data/LaddLenz.dta", convert.factors = FALSE) %>%
      mutate(nontolabor = 1 - tolabor) %>%
      mutate(occupation23 = occupation2 + occupation3) %>%
      filter(white == 1) %>%
      filter(region6 != 1) %>%
      filter(occupation7 != 1) %>%
      filter((occupation23 + occupation4 + occupation5 + occupation6) == 1) %>%
      filter(complete.cases(.))
cov <- df %>%
       select(-c("vote_l_97", "tolabor", "nontolabor", "white", "region6", 
                 "occupation2", "occupation3", "occupation7", "occupation23")) %>%
       colnames()
## Occupation 23 is dropped because one of occupation 23, 4, 5, 6 is 1

## Formulas
covariates <- paste(cov, collapse = "+")
formula_y <- as.formula(paste0("vote_l_97 ~ ", covariates))
formula_pst <- as.formula(paste0("tolabor ~ ", covariates))
formula_psc <- as.formula(paste0("nontolabor ~ ", covariates))

plan(strategy = multisession(workers = 6))
lambdat <- c(0, seq(0.01, 0.3, by = 0.01))
bbt <- bbgrid(lambda = lambdat, df = df, formula_ps = formula_pst, formula_y = formula_y, th = 0)
# lambda = 0.01 does not converge
# lambda = 0.02 does not converge
lambdac <- c(0, seq(0.01, 0.3, by = 0.01))
bbc <- bbgrid(lambda = lambdac, df = df, formula_ps = formula_psc, formula_y = formula_y, th = 0)
search_lambda(bbt)
search_lambda(bbc)

plan(strategy = multisession(workers = 6))
rest <- emp_est(data = df, formula_y = formula_y, formula_ps = formula_pst, se = "jk", B = 2000, lambda = 0.28)
resc <- emp_est(data = df, formula_y = formula_y, formula_ps = formula_psc, se = "jk", B = 2000, lambda = 0.01)
result <- calc_se(res1 = rest$result, res2 = resc$result, se = "jk", N = nrow(rest$df))
result


resbbt <- bb(result = rest, x = rest$df[, cov], treat = rest$df$tolabor, sigma = 1 / length(cov), std = TRUE, th = 0)
resbbc <- bb(result = resc, x = resc$df[, cov], treat = resc$df$nontolabor, sigma = 1 / length(cov), std = TRUE, th = 0)


## Figure H.1
color6 <- c("#e74c3c", "#2ecc71", "#3498db", "#9b59b6", "#34495e", "#f39c12")
estimator_levels <- c("nDBW DR", "MLE DR", "CAL (DR)", "CBPS (DR)", "EB (DR)", "Unweighted")
data.frame(Estimator = factor(estimator_levels, levels = estimator_levels)[1:5], 
           value = (resbbt + resbbc)[1:5],
           estimate = result$est[1:5],
           upper = (result$est + result$se * qnorm(0.975))[1:5],
           lower = (result$est - result$se * qnorm(0.975))[1:5]) %>%
ggplot(., aes(x = value, y = estimate, color = Estimator)) +
  geom_point(size = 3) +
  geom_segment(aes(x = value, xend = value, y = upper, yend = lower), linewidth = 0.6) +
  geom_hline(yintercept = 0, linetype = "dotted", color = "grey40", linewidth = 0.5) +
  annotate("text", x = 0.0805, y = 0.235, label = "nDBW\nDR",
           colour = color6[1], size = 3) +
  annotate("text", x = 0.1043, y = 0.25, label = "MLE\nDR",
           colour = color6[2], size = 3) +
  annotate("text", x = 0.101, y = 0.228, label = "CAL\n(DR)",
           colour = color6[3], size = 3) +
  annotate("text", x = 0.0983, y = 0.209, label = "CBPS\n(DR)",
           colour = color6[4], size = 3) +
  annotate("text", x = 0.0959, y = 0.230, label = "EB\n(DR)",
           colour = color6[5], size = 3) +
  xlab("Upper bound of bias") +
  ylab("ATE estimates") +
  scale_color_manual(values = color6[1:5]) +
  theme_classic() +
  theme(legend.position = "none",
        axis.title = element_text(size = 9.5),
        axis.text = element_text(size = 10),
        plot.title = element_text(hjust = 0.5, size = 12),
        legend.title = element_text(size = 0),
        legend.text = element_text(size = 9.5))
ggsave("figures/fig_h1.pdf", width = 12, height = 12, units = "cm")
#ggsave("figures/LL_UK_ubb.pdf", width = 12, height = 12, units = "cm")

sink()
